#### Appendix 7.4: Propagating Uncertainty of Hawkishness Measures in Topic Analysis ####

source("./code/loadPackages.R") # Install and load all necessary packages

#### Load necessary data ####
mad = fread("./data/dictionaryData.csv")    # Counts of key words based on dictionary
mad$admin = factor(mad$admin, levels=c("Truman", "Eisenhower", "Kennedy", "Johnson",
                                          "Nixon", "Ford", "Carter", "Reagan"))

nscd_all = fread("./data/meetingData.csv")  # Meeting-level data

# Regressions for hawkishness data from each iteration
violence_est = threat_est = balance_est = diplo_est = adversary_est = list()
for (i in 1:1000) {
  onecol = paste0("hawkBoot", i)
  
  submad = mad |> dplyr::select(starts_with("prop"), formal, doctype, admin, hawk = all_of(onecol))
  
  violence_all = lm(propViolence ~ hawk + formal + I(doctype=="Transcript") + factor(admin), data=submad)
  threat_all = lm(propThreat ~ hawk + formal + I(doctype=="Transcript") + factor(admin), data=submad)
  balance_all = lm(propBalance ~ hawk + formal + I(doctype=="Transcript") + factor(admin), data=submad)
  diplo_all = lm(propDiplo ~ hawk + formal + I(doctype=="Transcript") + factor(admin), data=submad)
  adversary_all = lm(propAdv ~ hawk + formal + I(doctype=="Transcript") + factor(admin), data=submad)
  
  violence_est[[i]] = tidy(violence_all)
  threat_est[[i]] = tidy(threat_all)
  balance_est[[i]] = tidy(balance_all)
  diplo_est[[i]] = tidy(diplo_all)
  adversary_est[[i]] = tidy(adversary_all)
  
  if (i %% 25==0) {paste0("Iteration ", message(i), " of 1,000 analyzed.")}
}

violence_out = rbindlist(violence_est)
threat_out = rbindlist(threat_est)
balance_out = rbindlist(balance_est)
diplo_out = rbindlist(diplo_est)
adversary_out = rbindlist(adversary_est)

topic_ests = list(violence_out, threat_out, balance_out, diplo_out, adversary_out)
topic_names = c("violence", "threat", "balance", "diplo", "adversary")

# Covariates
terms = c("hawk", "formal", "I(doctype == \"Transcript\")TRUE",
          "factor(admin)Eisenhower", "factor(admin)Kennedy", "factor(admin)Johnson",            
          "factor(admin)Nixon", "factor(admin)Ford", "factor(admin)Carter", "factor(admin)Reagan", "(Intercept)")

# Combine results
topicResultsList = list()
for (i in 1:length(topic_ests)) {
  one_out = topic_ests[[i]]
  
  termList = list()
  for (j in 1:length(terms)) {
    oneterm = one_out |> filter(term==terms[j]) |> dplyr::select(estimate, std.error)
    
    termstats = oneterm |> summarize(est_mean = mean(estimate), est_lower = quantile(estimate, 0.025), est_upper = quantile(estimate, 0.975),
                                     se_mean = mean(std.error), se_lower = quantile(std.error, 0.025), se_upper = quantile(std.error, 0.975))
    
    termList[[j]] = data.frame(term=terms[j], termstats)  
  }
  allTerms = rbindlist(termList) 
  
  topicResultsList[[i]] = data.frame(topic=topic_names[i], allTerms)
}

topicResults = rbindlist(topicResultsList)
topicResults$topic = factor(topicResults$topic, levels=topic_names) 
topicResults$signif = ifelse(topicResults$est_lower > 0 | topicResults$est_upper < 0, "Yes", "No")


#### Make coefficient tables ####
# First, need to get coefficient estimates
ceList = list()

for (i in 1:length(terms)) {
  oneterm = topicResults |> filter(term==terms[i])
  
  onetermest = sprintf("%.3f", round(oneterm$est_mean, 3))
  onetermse = sprintf("%.3f", round(oneterm$se_mean, 3))
  
  # Add another digit if numbers are too small
  onetermest = ifelse(onetermest=="0.000", sprintf("%.4f", round(oneterm$est_mean, 4)), onetermest)
  onetermse = ifelse(onetermse=="0.000", sprintf("%.4f", round(oneterm$se_mean, 4)), onetermse)
  
  # Put parentheses around the standard errors
  onetermse = paste0("(", onetermse, ")")
  
  # Find significance 
  onelower90 = oneterm$est_mean - qnorm(0.95)*oneterm$se_mean
  oneupper90 = oneterm$est_mean + qnorm(0.95)*oneterm$se_mean  
  
  onelower95 = oneterm$est_mean - qnorm(0.975)*oneterm$se_mean
  oneupper95 = oneterm$est_mean + qnorm(0.975)*oneterm$se_mean  
  
  onelower99 = oneterm$est_mean - qnorm(0.995)*oneterm$se_mean
  oneupper99 = oneterm$est_mean + qnorm(0.995)*oneterm$se_mean  
  
  stars = ifelse(onelower99 > 0 | oneupper99 < 0, "^{***}",
                 ifelse(onelower95 > 0 | oneupper95 < 0, "^{**}",
                        ifelse(onelower90 > 0 | oneupper90 < 0, "^{*}", "")))
  onetermest_star = paste0(onetermest, stars) 
  
  # Put things together for one term  
  onetermres = data.frame(rbind(t(onetermest_star), t(onetermse)))
  onetermres$var = c(terms[i], "")
  onetermres = onetermres |> dplyr::select(var, everything())
  
  ceList[[i]] = onetermres
  
}
ceOut = rbindlist(ceList)

# Rename the variables
ceOut$var = factor(ceOut$var, levels=c("hawk", "formal", "I(doctype == \"Transcript\")TRUE",
                                       "factor(admin)Eisenhower", "factor(admin)Kennedy", "factor(admin)Johnson",            
                                       "factor(admin)Nixon", "factor(admin)Ford", "factor(admin)Carter", "factor(admin)Reagan", "(Intercept)"),
                   labels=c("Speaker Hawkishness", "Formal", "Transcript", "Eisenhower", "Kennedy", "Johnson", "Nixon", "Ford", "Carter", "Reagan", "Constant"))

# Add topic headers to the table
names(ceOut) = c("", "Violence", "Threat", "Balance", "Diplomacy", "Adversary") 

# Number of observations (same across all analyses)
nobs(violence_all)

#### Table A43: Hawkishness and Speech Act Content, Propagating Uncertainty from Bootstrapping ####
print(xtable(ceOut), sanitize.text.function=identity, include.rownames=F) 
